CONFIG_2_0 = {
    "model_config": {
        "image_size": 64,
        "num_channels": 384,
        "num_res_blocks": 3,
        "channel_mult": "",
        "num_heads": 1,
        "num_head_channels": 64,
        "num_heads_upsample": -1,
        "attention_resolutions": "32,16,8",
        "dropout": 0,
        "model_dim": 768,
        "use_scale_shift_norm": True,
        "resblock_updown": True,
        "use_fp16": False,
        "cache_text_emb": True,
        "text_encoder_in_dim1": 1024,
        "text_encoder_in_dim2": 640,
        "pooling_type": "from_model",
        "in_channels": 4,
        "out_channels": 8,
        "up": False,
        "inpainting": False,
    },
    "diffusion_config": {
        "learn_sigma": True,
        "sigma_small": False,
        "steps": 1000,
        "noise_schedule": "linear",
        "timestep_respacing": "",
        "use_kl": False,
        "predict_xstart": False,
        "rescale_timesteps": True,
        "rescale_learned_sigmas": True,
        "linear_start": 0.0001,
        "linear_end": 0.02,
    },
    "image_enc_params": {
        "name": "AutoencoderKL",
        "scale": 0.0512,
        "params": {
            "ckpt_path": "",
            "embed_dim": 4,
            "ddconfig": {
                "double_z": True,
                "z_channels": 4,
                "resolution": 256,
                "in_channels": 3,
                "out_ch": 3,
                "ch": 128,
                "ch_mult": [1, 2, 4, 4],
                "num_res_blocks": 2,
                "attn_resolutions": [],
                "dropout": 0.0,
            },
        },
    },
    "text_enc_params1": {"model_path": "", "model_name": "multiclip"},
    "text_enc_params2": {"model_path": "", "model_name": "MT5EncoderModel"},
    "tokenizer_name1": "",
    "tokenizer_name2": "",
}

CONFIG_2_1 = {
    "clip_name": "ViT-L/14",
    "clip_image_size": 224,
    "tokenizer_name": "",
    "image_enc_params": {
        "name": "MOVQ",
        "scale": 1,
        "ckpt_path": "",
        "params": {
            "embed_dim": 4,
            "n_embed": 16384,
            "ddconfig": {
                "double_z": False,
                "z_channels": 4,
                "resolution": 256,
                "in_channels": 3,
                "out_ch": 3,
                "ch": 128,
                "ch_mult": [1, 2, 2, 4],
                "num_res_blocks": 2,
                "attn_resolutions": [32],
                "dropout": 0.0,
            },
        },
    },
    "text_enc_params": {
        "model_path": "",
        "model_name": "multiclip",
        "in_features": 1024,
        "out_features": 768,
    },
    "prior": {
        "clip_mean_std_path": "ViT-L-14_stats.th",
        "params": {
            "model": {
                "type": "prior",
                "diffusion_sampler": "uniform",
                "hparams": {
                    "text_ctx": 77,
                    "xf_width": 2048,
                    "xf_layers": 20,
                    "xf_heads": 32,
                    "xf_final_ln": True,
                    "xf_padding": False,
                    "text_drop": 0.2,
                    "clip_dim": 768,
                    "clip_xf_width": 768,
                },
            },
            "diffusion": {
                "steps": 1000,
                "learn_sigma": False,
                "sigma_small": True,
                "noise_schedule": "cosine",
                "use_kl": False,
                "predict_xstart": True,
                "rescale_learned_sigmas": False,
                "timestep_respacing": "",
            },
        },
    },
    "model_config": {
        "version": "2.1",
        "image_size": 64,
        "num_channels": 384,
        "num_res_blocks": 3,
        "channel_mult": "",
        "num_heads": 1,
        "num_head_channels": 64,
        "num_heads_upsample": -1,
        "attention_resolutions": "32,16,8",
        "dropout": 0,
        "model_dim": 768,
        "use_scale_shift_norm": True,
        "resblock_updown": True,
        "use_fp16": True,
        "cache_text_emb": True,
        "text_encoder_in_dim1": 1024,
        "text_encoder_in_dim2": 768,
        "image_encoder_in_dim": 768,
        "num_image_embs": 10,
        "pooling_type": "from_model",
        "in_channels": 4,
        "out_channels": 8,
        "use_flash_attention": False,
    },
    "diffusion_config": {
        "learn_sigma": True,
        "sigma_small": False,
        "steps": 1000,
        "noise_schedule": "linear",
        "timestep_respacing": "",
        "use_kl": False,
        "predict_xstart": False,
        "rescale_timesteps": True,
        "rescale_learned_sigmas": True,
        "linear_start": 0.00085,
        "linear_end": 0.012,
    },
}